The Office of Advocacyās Small Business Profiles are an annual analysis of each stateās small business activities. Each profile gathers the latest information from key federal data-gathering agencies to provide a snapshot of small business health and economic activity. This yearās profiles report on state economic growth and employment; small business employment, industry composition, and turnover; plus business owner demographics and county-level employment change.
from IPython.core.display import display, HTML
display(HTML("""<style> .container {width:96% !important;}</style>"""))
from IPython.display import IFrame
import pandas as pd
import multiprocessing
import numpy as np
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
import math
# Handle s3 or local
import s3fs
from os import listdir
from os.path import isfile, join
import subprocess
# Analysis
from plotly.offline import init_notebook_mode, iplot
import cufflinks as cf
init_notebook_mode()
cf.go_offline()
import sys
sys.path.insert(0,'../')
from Tools.paths import *
def list_files(path,ext = 'pdf'):
if path.startswith('s3://'):
onlyfiles = subprocess.check_output(['aws', 's3', 'ls', path_s3])
onlyfiles = onlyfiles.split('\n')
onlyfiles = [f.split(" ")[-1] for f in onlyfiles]
else:
onlyfiles = [f for f in listdir(path_local) if isfile(join(path_local, f))]
onlyfiles = [f for f in onlyfiles if f.endswith('.{}'.format(ext))]
files = [f.replace('.{}'.format(ext),'') for f in onlyfiles]
return files
def path(path,name,ext = 'pdf'):
path_file = '{}{}.{}'.format(path,name,ext)
return path_file
industry = pd.read_csv(path(path_s3_out,'industry','csv'),sep = ";")
industry = industry[industry['Industry']!='Total']
industry.head()
df_aux = industry[['Industry','1-499 Employees','Nonemployer Firms']]
df_aux = df_aux.groupby('Industry').sum()
df_aux.iplot(kind = 'bar')
employment = pd.read_csv(path(path_s3_out,'employment','csv'),sep = ";")
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
df.head()
df = df[['code','state']]
df.drop_duplicates(inplace=True)
employment.head()
df_aux = employment.groupby('State').sum()
df_aux = df_aux.reset_index()
df_aux['State'] = df_aux.State.apply(lambda x: x.replace("_"," ").split(".")[0])
print df_aux.shape
df_aux = df_aux.merge(df, left_on = 'State', right_on = 'state', how = 'left')
print df_aux.shape
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
df_aux['text'] = df_aux['State']
data = [ dict(
type='choropleth',
colorscale = scl,
autocolorscale = False,
locations = df_aux['code'],
z = df_aux['Small Business Employment'].astype(float),
locationmode = 'USA-states',
text = df_aux['text'],
marker = dict(
line = dict (
color = 'rgb(255,255,255)',
width = 2
) ),
colorbar = dict(
title = "Millions USD")
) ]
layout = dict(
title = '2016 USA Small Business Employment by State',
geo = dict(
scope='usa',
projection=dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)'),
)
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )
demographic = pd.read_csv(path(path_s3_out,'demographic','csv'),sep = ";")
demographic.head()
df_aux = demographic
df_aux['State'] = df_aux.State.apply(lambda x: x.replace("_"," ").split(".")[0])
print df_aux.shape
df_aux = df_aux.merge(df, left_on = 'State', right_on = 'state', how = 'left')
print df_aux.shape
scl = [[0.0, 'rgb(204,229,255)'],[0.2, 'rgb(153,204,255)'],[0.4, 'rgb(102,178,255)'],\
[0.6, 'rgb(51,153,255)'],[0.8, 'rgb(0,128,255)'],[1.0, 'rgb(0,102,204)']]
df_aux['text'] = df_aux['State']
data = [ dict(
type='choropleth',
colorscale = scl,
autocolorscale = False,
locations = df_aux['code'],
z = df_aux['Nonminority-owned'].astype(float),
locationmode = 'USA-states',
text = df_aux['text'],
marker = dict(
line = dict (
color = 'rgb(255,255,255)',
width = 2
) ),
colorbar = dict(
title = "%")
) ]
layout = dict(
title = '2016 USA Non-minority changes in business ownership by State',
geo = dict(
scope='usa',
projection=dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)'),
)
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )